**********************************************************************
*Plot AHA data 
**********************************************************************
*File options 
local N 500 // N bootstraps

*I. MAP ACQUIRED HOSPITALS 
*Figures 2, A2
	foreach is in is ss {
	
	*Prepare data 
	*Assumes the construction of a hospital-year level episode file, which contains 
	*spending summary variables and has been merged to American Hospital Association 
	*(AHA) IDs and mergers. These processing steps are described in Appendix A1, A3. 
	use "$reg\hospreg_FULL0_all1", clear
	keep *num* ye vol stcd 
	merge 1:1 num ye using "$reg\FULLsample`is'", keepusing(incl_1`is' never wt_cemm)
	keep if _m==3 
	drop _m 	
	
	*aha_vol_clean contains latitude and longitude variables from the AHA data.
	merge 1:1 old_num ye using "$aha\vol\aha_vol_clean", keepusing(lat long_)
	keep if _m==3
	drop _m old_num	
	foreach var in lat long_ {
	ren `var' `var'2
	bysort num: gegen `var' = mean(`var'2)
	drop `var'2
	}
	
	*Limit to matched hospitals in continental US
	drop if stcd==94 | stcd==95 | stcd < 11
	keep if wt_cemm > 0 			
	foreach var in incl_1`is' never {
	bysort num: gegen has_`var' = max(`var')
	}
	keep num has_* lat long_ 
	gduplicates drop	
	isid num
	tempfile hosplocvisuals
	save `hosplocvisuals', replace 
	
	*Public state shape files obtained from census.gov
	use "$aha\statecoordfile", clear
	merge m:1 _ID using "$aha\statefile"
	drop _m
	drop if inlist(NAME,"Alaska","Hawaii","Puerto Rico")
	merge using `hosplocvisuals'
	
	*Map 
	if "`is'"=="is" {
	twoway (line _Y _X,lwidth(vthin) cmiss(n) lcolor(gs12))||(scatter lat long_ if has_never==1, mcolor("0 204 0 %50") msize(tiny))||(scatter lat long_ if has_incl_1`is'==1, mcolor("51 51 204") msymbol(triangle) msize(vsmall)), xla(-124/-71) yla(,nogrid) ysc(off) xsc(off) graphr(fc(white)) legend(order(2 "{stSerif:Never acquired}" 3 "{stSerif:Acquired}") nobox region(lstyle(none)) cols(1) ring(0) bplacement(sw))
	graph export "$op\is_never.png", replace
	}
	if "`is'"=="ss" {
	twoway (line _Y _X,lwidth(vthin) cmiss(n) lcolor(gs12))||(scatter lat long_ if has_incl_1`is'==1, mcolor("51 51 204") msymbol(triangle) msize(vsmall)), xla(-124/-71) yla(,nogrid) ysc(off) xsc(off) graphr(fc(white)) legend(order(2 "{stSerif:Previously acquired}" 3 "{stSerif:Acquired (non-corporatization)}") nobox region(lstyle(none)) cols(1) ring(0) bplacement(sw))||(scatter lat long_ if has_never==1, mcolor("0 204 0 %50") msize(tiny))
	graph export "$op\ss_prev.png", replace
	}	
	}		

*II. DESCRIPTIVE TRENDS 
*Figure 1, A1
	*Prepare AHA data 
	{
	*Use main AHA file (2012-2018)
	*aha_merge6_1.dta contains hospital-year level data from the AHA survey and 
	*identified mergers, described in Appendix A1, A3. 
	use "$aha\aha_merge6_1", clear
	
	*Limit to GAC hospitals 
	drop if hosp_cntrl=="fed"
	keep if hosptype=="gac"
	replace sysid = . if sysid==num 
	replace sysid = . if sysid>10000
	
	*Merge to extended AHA file 
	*ahaextended contains AHA variables from 2000-2020.
	keep ye id hrrcode bdtot sysid ofint mloczip stcd fte 
	ren sysid sysid2 
	merge 1:1 id ye using "$aha/ahaextended", keepusing(hrrcode bdtot sysid ofint mloczip fte exptot paytot npayben stcd)
	drop _m 
	replace sysid2 = sysid if missing(sysid2)
	drop sysid
	ren sysid sysid 
	gen syso = !missing(sysid)
	gen indep = !syso
	gen zip = substr(mloczip,1,5)
	drop mloczip
	
	*Replace system id with hospital id if missing (independent)
	ren id id_ 
	gegen   id     = group(id)
	replace id     = id + 10000
	gen     sysid2 = sysid
	replace sysid2 = id if missing(sysid)
	drop sysid id_
	
	*Fill in HRRcodes
	sort id ye
	forval i = 1/10 {
	replace hrrcode = hrrcode[_n-`i'] if id==id[_n-`i'] & missing(hrrcode)
	replace hrrcode = hrrcode[_n+`i'] if id==id[_n+`i'] & missing(hrrcode)
	}
	sort zip ye
	forval i = 1/50 {
	replace hrrcode = hrrcode[_n-`i'] if zip==zip[_n-`i'] & missing(hrrcode)
	replace hrrcode = hrrcode[_n+`i'] if zip==zip[_n+`i'] & missing(hrrcode)
	}
	drop if stcd < 10
	drop if missing(hrrcode)
	}
	
	*labor share 
	*Figure A1(b)
	{
	ren paytot paytot2
	gen paytot = paytot2 + npayben
	drop paytot2 	
	gen pay_opex = paytot / exptot 
	sum pay_opex, d 
	
	foreach var in pay_opex {
	qui sum `var', d
	replace `var' = . if `var' < r(p5) & !missing(`var')
	replace `var' = . if `var' > r(p95) & !missing(`var')
	}	
	tab ye, sum(pay_opex)
	pause 
	}
		
	*System owned share of ftes and beds 
	*Figure 1(a)
	{
	preserve 
	keep ye fte syso 
	foreach var in fte {
	bysort ye: gegen tot_`var' = sum(`var')
	replace `var' = 0 if syso==0 
	bysort ye: gegen sys_`var' = sum(`var')
	replace sys_`var' = sys_`var' / tot_`var'
	}
		
	keep ye sys_* 
	gduplicates drop 
	gduplicates report ye 
	tab ye, sum(sys_fte)
	pause
	restore
		
	preserve
	bysort indep ye: gegen sh = sum(bdtot) 
	bysort ye: gegen n = sum(bdtot)
	replace sh = sh / n
	keep indep ye sh
	gduplicates drop 
	tab ye if !indep, sum(sh)
	pause
	restore
	}

	*HRRs with no indep hospitals 
	*HRRs where top 2 systems have 50%+ share
	*Market concentration (HHI) 
	*Figures 1(b), 1(c)
	{
	*top 2 systems' share
	bysort hrrcode ye: 		  gegen totbeds = sum(bdtot)
	bysort hrrcode ye sysid2:	  gegen sysbeds = sum(bdtot)
	replace sysbeds = (sysbeds / totbeds)
	
	preserve
	keep hrrcode ye sysbeds sysid2 syso
	gduplicates drop 
	gduplicates report hrr ye sysid2
	keep if syso==1
	gsort hrrcode ye -sysbeds 
	bysort hrrcode ye: gen ct = _n
	
	gen top2 = ct==1 | ct==2
	gen sysbeds3 = sysbeds if top2==1
	bysort hrrcode ye: 		  gegen top2sys_sh = sum(sysbeds3)
	keep hrrcode ye top2sys_sh
	gduplicates drop 
	tempfile topsys 
	save `topsys', replace	
	restore
	
	merge m:1 hrrcode ye using `topsys', keepusing(top2sys_sh)
	replace top2sys_sh = 0 if missing(top2sys_sh)
	drop if _m==2
	drop _m
		
	*market concentration 
	replace sysbeds = sysbeds*sysbeds 
	sum sysbeds, d	
	sort hrrcode ye sysid2
	bysort hrrcode ye sysid2: gen ct = _n
	replace sysbeds = . if ct!=1
	bysort hrrcode ye: gegen hhi = sum(sysbeds)
	sum hhi,d
	drop sysbeds ct totbeds
	
	*HRRs with no independent hospitals 
	bysort hrr ye: gegen n_indep = sum(indep)
	
	preserve
	keep hrr ye n_indep top2sys_sh hhi
	gduplicates drop
	gduplicates report hrr ye
	gen no_indeps  = n_indep==0
	*HRRs where top 2 systems have 50%+ share
	gen top2sys_50 = top2sys_sh > .5  
	tab ye, sum(no_indeps)
	tab ye, sum(top2sys_50)
	tab ye, sum(hhi)
	pause
	restore
	}
	
*III. DESCRIPTIVE SCATTER PLOTS 
*Figure 1 
	*Prepare data 
	{
	*System size (GAC hospitals)
	use "$aha\aha_merge6_1", clear
	keep if hosptype=="gac"
	drop if hosp_c=="fed"
	bysort sysid ye: gegen n_hosps    = count(num) 
	
	*Limit to hospitals in Elevance data 
	keep if ofint==1
	keep sysid num ye n_hosps exptot bdtot hrr
	merge 1:1 num ye using "$reg\hospreg_FULL0_all1", keepusing(num vol cost17 tot_drgwt `ptntcontrol')
	keep if _m==3
	drop _m
	*Drop if insufficient obs 
	drop if vol < 15	
	
	*Normalize expenses
	bysort num: gegen minye = min(ye)
	gen temp = bdtot if ye==minye
	bysort num: gegen bdtot_first = max(temp)
	drop temp minye bdtot 
	replace exptot = exptot / bdtot_first
	replace exptot = exptot / 1000 

	*Keep first years of data 
	keep if year==2012 | year==2013
	ren n_hosp n_hosp2
	bysort num: gegen n_hosp = mean(n_hosp2)
	bysort num: gegen sd     = sd(n_hosp2)
	replace sd = 0 if missing(sd)
	drop n_hosp2
	
	*Exclude hospitals acquired in 2013
	xtset num ye
	gen flag2 = L1.sysid!=sysid & !missing(L1.sysid)
	bysort num: gegen flag = max(flag2)
	drop if flag
	drop flag*
	keep if ye==2012
	gduplicates drop
	gduplicates report num		
	
	*System size outliers
	sum n_hosp, d
	replace n_hosp = . if n_hosp >= r(p95)
	drop if missing(n_hosp)

	*Bootstrap: sample providers N times, saving weights.
	set seed 1234
	forval i = 1/`N' {
	gen bwt_`i' = 0 
	bsample, cluster(num_prv) weight(bwt_`i') 
	}
	qui compress 
	save "$reg\aha_motiv", replace	
	
	*Adult readmissions
	*Assumes the construction of an episode level cardiac file, which contains 
	*readmission variables and has been merged to American Hospital Association 
	*(AHA) IDs and mergers. These processing steps are described in Appendix A1, A3. 
	use "$reg\cardio_inp_ptnt_0_r1", clear	
	foreach i in 90 {
	replace p_read`i'_inp = . if transfer==1
	}
	drop if missing(p_read90_inp) | missing(tot_drgwt)
	keep if age >= 18
	ren p_read90_inp reads
	
	*Keep first years of data 
	keep if ye==2012 | year==2013
	merge m:1 num ye using "$reg\aha_motiv"
	bysort num: gegen has_merge = max(_m)
	keep if has_merge==3
	drop _m has_m
	
	*Collapse, merge in bootstrap weights
	gcollapse (count) indexd (mean) `dv' tot_drgwt `ptntcontrol' sysid n_*, by(num_prv ye)
	merge m:1 num using "$reg\aha_motiv", keepusing(bwt*)	
	keep if _m==3
	drop _m 
	qui compress 
	save "$reg\aha_motiv_reads", replace	
	}
	
	*Residualize & plot 
	foreach dv in reads cost exp {
	
	*Bootstrapped residuals (1st stage)
	if "`dv'"!="reads" {
	use "$reg\aha_motiv", clear	
	reg `dv' tot_drgwt `ptntcontrol' 
	predict _resid, resid
	
	forval i = 1/`N' {
	di "`i'"
	qui reg `dv' tot_drgwt `ptntcontrol' [fw=bwt_`i']
	qui predict resid`i', resid
	}	
	keep num *resid* sysid vol n_* bwt*
	}
	
	if "`dv'"=="reads" { 
	use "$reg\aha_motiv_reads", clear
	reg `dv' tot_drgwt `ptntcontrol' 
	predict _resid, resid
	replace _resid = _resid * 100
	
	forval i = 1/`N' {
	qui reg `dv' i.year tot_drgwt `ptntcontrol' [fw=bwt_`i']
	qui predict resid`i', resid
	replace resid`i' = resid`i' * 100
	}
	
	gcollapse (count) indexd (mean) *resid* bwt_* n_*, by(num_prv)
	ren indexd vol 
	}
		
	*Outliers
	drop if missing(n_hosp)
	qui sum _resid, d
	replace _resid = r(p95) if _resid > r(p95) & !missing(_resid)
	replace _resid = r(p5)  if _resid < r(p5)  & !missing(_resid)
	gen _sdy = r(sd)
	
	forval i = 1/`N' {
	qui sum resid`i' [fw=bwt_`i'], d
	replace resid`i' = r(p95) if resid`i' > r(p95) & !missing(resid`i')
	replace resid`i' = r(p5)  if resid`i' < r(p5)  & !missing(resid`i')
	gen sdy`i' = r(sd)
	}
	
	*Bootstrapped slopes (2nd stage)
	replace _resid = _resid / _sdy * 100
	reg _resid n_hosp 
	predict _pred_`dv'
	local slope = round(_b[n_hosp],.01)
	
	sort num 
	gen slope = .
	forval i = 1/`N' {
	qui replace resid`i' = resid`i' / sdy`i' * 100
	qui reg resid`i' n_hosp [fw=bwt_`i']
	qui replace slope = _b[n_hosp] in `i'
	}
	sum slope, d	
	local se    = round(r(sd),.01)
	drop resid* *sdy* *slope bwt*
	
	*Plot 
	if "`dv'"=="cost" {
	local ylab "Price residual (% of s.d.)"
	}
	if "`dv'"=="reads" {
	local ylab "Readmissions residual (% of s.d.)"
	}
	if "`dv'"=="exp" {
	local ylab "Operating expense residual (% of s.d.)"
	}	
	local xtitle "Size (n. hospitals)"
	
	xtile bin_size1 = n_hosp, nq(10)
	xtile bin_size2 = n_hosp, nq(15)
	collapse (mean) _pred_`dv' _resid n_hosp vol, by (bin_size1)
	
	sum _resid, d
	local yspot1 = r(p90)
	local yspot2 = r(p10)
	sum n_,d
	local xspot = r(p50)
	if "`dv'"=="cost"  {
	local yspot1 = `yspot2'+15
	}
	if "`dv'"=="exp"  {
	local yspot1 = `yspot2'+30
	}
	
	*Format slope 
	local s "0`slope'"
	if `slope' < 0 {
	gen s = abs(`slope')
	qui sum s
	local s = round(r(mean), .01)
	local s "-0`s'"
	drop s
	}
	
	twoway (line _pred_`dv' n_hosp, lc(black)) (scatter _resid n_hosp, msym(Oh) mcol(blue*1.5) graphregion(color(white)) xtitle(`xtitle',size(large)) ytitle("`ylab'", size(large)) yla(#5, ang(0)) xla(#6) legend(off) text(`yspot1' `xspot' "Slope: `s' (0`se')", place(e) size(medlarge)) ysize(3.13) xsize(5) xlabel(,labsize(large)) ylabel(,labsize(large)) xscale(log)) 
	graph export "$op\motiv_`dv'_size.png", replace width(2000)
	}
